library(tidyverse)

movies <- read.csv("archive/tmdb_5000_movies.csv")
names(movies)
 [1] "budget"               "genres"               "homepage"             "id"                  
 [5] "keywords"             "original_language"    "original_title"       "overview"            
 [9] "popularity"           "production_companies" "production_countries" "release_date"        
[13] "revenue"              "runtime"              "spoken_languages"     "status"              
[17] "tagline"              "title"                "vote_average"         "vote_count"          

1

First thoughts - homepage - missing values - revenue - 0 value - few factor types - popularity - num with decimal shouldn’t be double? - lot of repetitive values

2

movies_selected <-
movies %>% 
  select(title,runtime, budget)

3

Ok, now take your subsetted data movies_selected, and count the number of missing values in each column.

4

movies_runtime<-
movies %>%
  mutate(runtime = na_if(runtime, 0))
  
movies %>% 
  summarise(sum(is.na(runtime)))

5

movies_imputed <- movies %>% 
  mutate(runtime = coalesce(runtime, median(runtime, na.rm = TRUE)))
movies %>% 
  summarise(sum(is.na(runtime)))

6

movies_imputed %>% 
  slice_min(runtime, n = 10)

7

Extension 1

2

Take the original dataset with all the variables. Using across and where, summarise the number of missing values, first across all columns of type character, and then across all columns of type numeric.

movies %>% 
  group_by(across(where(is.numeric))) %>% 
  summarise(sum(is.na = T))
`summarise()` regrouping output by 'budget', 'id', 'popularity', 'revenue', 'runtime', 'vote_average' (override with `.groups` argument)
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQoKbW92aWVzIDwtIHJlYWQuY3N2KCJhcmNoaXZlL3RtZGJfNTAwMF9tb3ZpZXMuY3N2IikKYGBgCmBgYHtyfQpoZWFkKG1vdmllcykKYGBgCmBgYHtyfQpuYW1lcyhtb3ZpZXMpCmBgYAojIDEKRmlyc3QgdGhvdWdodHMKLSBob21lcGFnZSAtIG1pc3NpbmcgdmFsdWVzCi0gcmV2ZW51ZSAtIDAgdmFsdWUKLSBmZXcgZmFjdG9yIHR5cGVzCi0gcG9wdWxhcml0eSAtIG51bSB3aXRoIGRlY2ltYWwgc2hvdWxkbid0IGJlIGRvdWJsZT8KLSBsb3Qgb2YgcmVwZXRpdGl2ZSB2YWx1ZXMKCiMgMgoKYGBge3J9Cm1vdmllc19zZWxlY3RlZCA8LQptb3ZpZXMgJT4lIAogIHNlbGVjdCh0aXRsZSxydW50aW1lLCBidWRnZXQpCmBgYAojIDMKT2ssIG5vdyB0YWtlIHlvdXIgc3Vic2V0dGVkIGRhdGEgbW92aWVzX3NlbGVjdGVkLCBhbmQgY291bnQgdGhlIG51bWJlciBvZiBtaXNzaW5nIHZhbHVlcyBpbiBlYWNoIGNvbHVtbi4KCmBgYHtyfQojIGNvdW50IHRoZSBudW1iZXIgb2YgbWlzc2luZyB2YWx1ZXMgaW4gZWFjaCBjb2x1bW4KbW92aWVzX3NlbGVjdGVkICU+JSAKICBzdW1tYXJpc2UoY291bnQgPSBzdW0oaXMubmEobW92aWVzX3NlbGVjdGVkKSkpCmBgYAoKIyA0CgpgYGB7cn0KbW92aWVzICU+JSAKICBmaWx0ZXIocnVudGltZSA9PSAwKSAlPiUgCiAgc3VtbWFyaXNlKGNvdW50ID0gc3VtKHJ1bnRpbWUgPT0gMCkpCmBgYAoKCmBgYHtyfQptb3ZpZXNfcnVudGltZTwtCm1vdmllcyAlPiUKICBtdXRhdGUocnVudGltZSA9IG5hX2lmKHJ1bnRpbWUsIDApKQogIApgYGAKYGBge3J9Cm1vdmllcyAlPiUgCiAgc3VtbWFyaXNlKHN1bShpcy5uYShydW50aW1lKSkpCmBgYAoKIyA1CgpgYGB7cn0KbW92aWVzX2ltcHV0ZWQgPC0gbW92aWVzICU+JSAKICBtdXRhdGUocnVudGltZSA9IGNvYWxlc2NlKHJ1bnRpbWUsIG1lZGlhbihydW50aW1lLCBuYS5ybSA9IFRSVUUpKSkKCmBgYApgYGB7cn0KbW92aWVzICU+JSAKICBzdW1tYXJpc2Uoc3VtKGlzLm5hKHJ1bnRpbWUpKSkKYGBgCgojIDYgCgpgYGB7cn0KbW92aWVzX2ltcHV0ZWQgJT4lIAogIHNsaWNlX21heChydW50aW1lLCBuID0gMTApCmBgYApgYGB7cn0KbW92aWVzX2ltcHV0ZWQgJT4lIAogIHNsaWNlX21pbihydW50aW1lLCBuID0gMTApCmBgYAoKIyA3CgpgYGB7cn0KbW92aWVzJT4lIAogIG11dGF0ZShtb3ZpZXNfaW1wdXRlZCA9IGlmX2Vsc2UoYnVkZ2V0IDwgMTAwLCBtZWRpYW4oYnVkZ2V0KSwgYnVkZ2V0KSkKYGBgCgojIEV4dGVuc2lvbiAxCgpgYGB7cn0KbW92aWVfYnVkZ2V0cyA8LSBtb3ZpZXMgJT4lIAogIG11dGF0ZShidWRnZXRfdHlwZSA9IGNhc2Vfd2hlbihidWRnZXQgPCAxMmU2IH4gIlNtYWxsIGJ1ZGdldCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGJ1ZGdldCA+IDEyZTYgJiBidWRnZXQgPCA0MGU2IH4gIk1lZGl1bSBidWRnZXQiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBidWRnZXQgPiA0MGU2IH4gIkJpZyBidWRnZXQiKSkKbW92aWVfYnVkZ2V0cwpgYGAKIyAyClRha2UgdGhlIG9yaWdpbmFsIGRhdGFzZXQgd2l0aCBhbGwgdGhlIHZhcmlhYmxlcy4gVXNpbmcgYWNyb3NzIGFuZCB3aGVyZSwgc3VtbWFyaXNlIHRoZSBudW1iZXIgb2YgbWlzc2luZyB2YWx1ZXMsIGZpcnN0IGFjcm9zcyBhbGwgY29sdW1ucyBvZiB0eXBlIGNoYXJhY3RlciwgYW5kIHRoZW4gYWNyb3NzIGFsbCBjb2x1bW5zIG9mIHR5cGUgbnVtZXJpYy4KYGBge3J9Cm1vdmllcyAlPiUgCiAgZ3JvdXBfYnkoYWNyb3NzKHdoZXJlKGlzLmNoYXJhY3RlcikpKSAlPiUgCiAgc3VtbWFyaXNlKHN1bShpcy5uYSA9IFQpKQpgYGAKYGBge3J9Cm1vdmllcyAlPiUgCiAgZ3JvdXBfYnkoYWNyb3NzKHdoZXJlKGlzLm51bWVyaWMpKSkgJT4lIAogIHN1bW1hcmlzZShzdW0oaXMubmEgPSBUKSkKYGBgCgo=